{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import matplotlib\n",
    "from matplotlib import pyplot as plt\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>text</th>\n",
       "      <th>anger</th>\n",
       "      <th>anticipation</th>\n",
       "      <th>disgust</th>\n",
       "      <th>fear</th>\n",
       "      <th>joy</th>\n",
       "      <th>love</th>\n",
       "      <th>optimism</th>\n",
       "      <th>pessimism</th>\n",
       "      <th>sadness</th>\n",
       "      <th>surprise</th>\n",
       "      <th>trust</th>\n",
       "      <th>neutral</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>He was answering a question about the criticis...</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>I'm going to start today's discussion thread w...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>By announcing the 395 self-quarantined, it pai...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>Likewise, sorry if I offended you. I’m not act...</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>People infected by experience high fever, coug...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   id                                               text  anger  anticipation  \\\n",
       "0   0  He was answering a question about the criticis...      1             0   \n",
       "1   1  I'm going to start today's discussion thread w...      1             1   \n",
       "2   2  By announcing the 395 self-quarantined, it pai...      1             1   \n",
       "3   3  Likewise, sorry if I offended you. I’m not act...      1             0   \n",
       "4   4  People infected by experience high fever, coug...      0             0   \n",
       "\n",
       "   disgust  fear  joy  love  optimism  pessimism  sadness  surprise  trust  \\\n",
       "0        1     0    0     0         0          1        0         0      0   \n",
       "1        1     1    0     0         0          1        0         0      0   \n",
       "2        1     1    0     0         0          1        0         0      0   \n",
       "3        1     1    0     0         0          1        0         0      0   \n",
       "4        0     0    0     0         0          0        0         0      0   \n",
       "\n",
       "   neutral  \n",
       "0        0  \n",
       "1        0  \n",
       "2        0  \n",
       "3        0  \n",
       "4        1  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# read dataset\n",
    "# we will be using trained dataset to understand how people are reacting\n",
    "data = pd.read_csv(\"nlp_train.csv\")\n",
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>anger</th>\n",
       "      <th>anticipation</th>\n",
       "      <th>disgust</th>\n",
       "      <th>fear</th>\n",
       "      <th>joy</th>\n",
       "      <th>love</th>\n",
       "      <th>optimism</th>\n",
       "      <th>pessimism</th>\n",
       "      <th>sadness</th>\n",
       "      <th>surprise</th>\n",
       "      <th>trust</th>\n",
       "      <th>neutral</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>count</td>\n",
       "      <td>1493.000000</td>\n",
       "      <td>1493.000000</td>\n",
       "      <td>1493.000000</td>\n",
       "      <td>1493.000000</td>\n",
       "      <td>1493.000000</td>\n",
       "      <td>1493.000000</td>\n",
       "      <td>1493.000000</td>\n",
       "      <td>1493.000000</td>\n",
       "      <td>1493.000000</td>\n",
       "      <td>1493.000000</td>\n",
       "      <td>1493.000000</td>\n",
       "      <td>1493.000000</td>\n",
       "      <td>1493.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>mean</td>\n",
       "      <td>746.000000</td>\n",
       "      <td>0.364367</td>\n",
       "      <td>0.503014</td>\n",
       "      <td>0.454119</td>\n",
       "      <td>0.454119</td>\n",
       "      <td>0.123912</td>\n",
       "      <td>0.092431</td>\n",
       "      <td>0.328198</td>\n",
       "      <td>0.432686</td>\n",
       "      <td>0.277294</td>\n",
       "      <td>0.108506</td>\n",
       "      <td>0.168118</td>\n",
       "      <td>0.113195</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>std</td>\n",
       "      <td>431.136289</td>\n",
       "      <td>0.481413</td>\n",
       "      <td>0.500158</td>\n",
       "      <td>0.498057</td>\n",
       "      <td>0.498057</td>\n",
       "      <td>0.329591</td>\n",
       "      <td>0.289731</td>\n",
       "      <td>0.469715</td>\n",
       "      <td>0.495614</td>\n",
       "      <td>0.447813</td>\n",
       "      <td>0.311123</td>\n",
       "      <td>0.374096</td>\n",
       "      <td>0.316937</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>min</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>25%</td>\n",
       "      <td>373.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>50%</td>\n",
       "      <td>746.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>75%</td>\n",
       "      <td>1119.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>max</td>\n",
       "      <td>1492.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                id        anger  anticipation      disgust         fear  \\\n",
       "count  1493.000000  1493.000000   1493.000000  1493.000000  1493.000000   \n",
       "mean    746.000000     0.364367      0.503014     0.454119     0.454119   \n",
       "std     431.136289     0.481413      0.500158     0.498057     0.498057   \n",
       "min       0.000000     0.000000      0.000000     0.000000     0.000000   \n",
       "25%     373.000000     0.000000      0.000000     0.000000     0.000000   \n",
       "50%     746.000000     0.000000      1.000000     0.000000     0.000000   \n",
       "75%    1119.000000     1.000000      1.000000     1.000000     1.000000   \n",
       "max    1492.000000     1.000000      1.000000     1.000000     1.000000   \n",
       "\n",
       "               joy         love     optimism    pessimism      sadness  \\\n",
       "count  1493.000000  1493.000000  1493.000000  1493.000000  1493.000000   \n",
       "mean      0.123912     0.092431     0.328198     0.432686     0.277294   \n",
       "std       0.329591     0.289731     0.469715     0.495614     0.447813   \n",
       "min       0.000000     0.000000     0.000000     0.000000     0.000000   \n",
       "25%       0.000000     0.000000     0.000000     0.000000     0.000000   \n",
       "50%       0.000000     0.000000     0.000000     0.000000     0.000000   \n",
       "75%       0.000000     0.000000     1.000000     1.000000     1.000000   \n",
       "max       1.000000     1.000000     1.000000     1.000000     1.000000   \n",
       "\n",
       "          surprise        trust      neutral  \n",
       "count  1493.000000  1493.000000  1493.000000  \n",
       "mean      0.108506     0.168118     0.113195  \n",
       "std       0.311123     0.374096     0.316937  \n",
       "min       0.000000     0.000000     0.000000  \n",
       "25%       0.000000     0.000000     0.000000  \n",
       "50%       0.000000     0.000000     0.000000  \n",
       "75%       0.000000     0.000000     0.000000  \n",
       "max       1.000000     1.000000     1.000000  "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#basic stats\n",
    "data.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    949\n",
       "1    544\n",
       "Name: anger, dtype: int64"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data['anger'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    1003\n",
       "1     490\n",
       "Name: optimism, dtype: int64"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "freqs = {\"anger\":data['anger'].value_counts()[1]\n",
    "anticipation = data['anticipation'].value_counts()[1]\n",
    "data['disgust'].value_counts()[1]\n",
    "data['fear'].value_counts()[1]\n",
    "data['joy'].value_counts()[1]\n",
    "data['love'].value_counts()[1]\n",
    "data['optimism'].value_counts()[1]\n",
    "data['pessimism'].value_counts()[1]\n",
    "data['sadness'].value_counts()[1]\n",
    "data['surprise'].value_counts()[1]\n",
    "data['trust'].value_counts()[1]\n",
    "data['neutral'].value_counts()[1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
